from langchain.document_loaders import PyPDFLoader, MathpixPDFLoader, UnstructuredPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.faiss import FAISS


def normal_pdf_loader():
    loader = PyPDFLoader("files/stackoverflow-about-Python.pdf")
    pages = loader.load_and_split()
    for page in pages:
        print(page)


def pdf_loader_with_ai():
    loader = PyPDFLoader("files/stackoverflow-about-Python.pdf")
    pages = loader.load_and_split()
    faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())
    docs = faiss_index.similarity_search("Stackoverflow about Python", k=2)
    for doc in docs:
        print(str(doc.metadata["page"]) + ":", doc.page_content[:300])


def pdf_loader_with_math_pix():
    loader = MathpixPDFLoader("files/stackoverflow-about-Python.pdf")
    print(loader.load())


def pdf_loader_with_unstructured():
    loader = UnstructuredPDFLoader("files/stackoverflow-about-Python.pdf")
    print(loader.load())
    

if __name__ == '__main__':
    pdf_loader_with_math_pix()